package com.springdeveloper.hadoop;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.type.TypeReference;
public class TweetCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable ONE = new IntWritable(1);
private final ObjectMapper mapper = new ObjectMapper(new JsonFactory());
@SuppressWarnings("unchecked")
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
Map<String, Object> tweet = mapper.readValue(value.toString(),
new TypeReference<HashMap<String, Object>>(){});
Map<String, Object> entities = (Map<String, Object>) tweet.get("entities");
List<Map<String, Object>> hashTagEntries = null;
if (entities != null) {
hashTagEntries = (List<Map<String, Object>>) entities.get("hashTags");
}
if (hashTagEntries != null && hashTagEntries.size() > 0) {
for (Map<String, Object> hashTagEntry : hashTagEntries) {
String hashTag = hashTagEntry.get("text").toString();
context.write(new Text(hashTag), ONE);
}
}
}
}